#!/usr/bin/env Rscript

rm(list = ls())

options(stringsAsFactors=FALSE)

suppressMessages(library(raster))
suppressMessages(library(plyr))
suppressMessages(library(doParallel))

setwd("")

cat("\nReading voter data..\n")
## voter record, not provided
## DATA <- read.csv(
##     "formattedByIndividual_Cali3AllYears_byPID_filtered_movers_selectvars2_fixed.csv"
##     )
## subsetted variables and observations to fit in memory

cat("\nFormatting votes..\n")
DATA$pg08[DATA$zip_cali309==""] <- NA
DATA$pp08[DATA$zip_cali309==""] <- NA

DATA$pg12[DATA$zip_cali312==""] <- NA
DATA$pp12[DATA$zip_cali312==""] <- NA

DATA$gg10[DATA$zip_cali310==""] <- NA
DATA$gp10[DATA$zip_cali310==""] <- NA

DATA$gg14[DATA$zip_cali314==""] <- NA
DATA$gp14[DATA$zip_cali314==""] <- NA

cat("\nReading 2007 zips..")
ZIPS07 <- read.csv(
    "tmp07"
    )

cat("\n  joining..\n")
DATA <- join(DATA, ZIPS07, by = "pid")

cat("\nReading 2009 zips..")
ZIPS09 <- read.csv(
    "tmp09"
    )

cat("\n  joining..\n")
DATA <- join(DATA, ZIPS09, by = "pid")

cat("\nReading 2010 zips..")
ZIPS10 <- read.csv(
    "tmp10"
    )

cat("\n  joining..\n")
DATA <- join(DATA, ZIPS10, by = "pid")

cat("\nReading 2012 zips..")
ZIPS12 <- read.csv(
    "tmp12"
    )

cat("\n  joining..\n")
DATA <- join(DATA, ZIPS12, by = "pid")

cat("\nReading 2014 zips..")
ZIPS14 <- read.csv(
    "tmp14"
    )

cat("\n  joining..\n")
DATA <- join(DATA, ZIPS14, by = "pid")

cat("\nReading 2007 alters..")
ALTERS07 <- read.csv(
    "alters07to09.csv", head=FALSE
    )
names(ALTERS07) <- c("pid","sameres07to09")

cat("\n  joining..\n")
DATA <- join(DATA, ALTERS07, by = "pid")

cat("\nReading 2009 alters..")
ALTERS09 <- read.csv(
    "alters09to10.csv", head=FALSE
    )
names(ALTERS09) <- c("pid","sameres09to10")

cat("\n  joining..\n")
DATA <- join(DATA, ALTERS09, by = "pid")

cat("\nReading 2010 alters..")
ALTERS10 <- read.csv(
    "alters10to12.csv", head=FALSE
    )
names(ALTERS10) <- c("pid","sameres10to12")

cat("\n  joining..\n")
DATA <- join(DATA, ALTERS10, by = "pid")

cat("\nReading 2012 alters..")
ALTERS12 <- read.csv(
    "alters12to14.csv", head=FALSE
    )
names(ALTERS12) <- c("pid","sameres12to14")

cat("\n  joining..\n")
DATA <- join(DATA, ALTERS12, by = "pid")

cat("\nReading 2007 to 2014 alters..")
ALTERS07to14 <- read.csv(
    "alters07to14.csv", head=FALSE
    )
names(ALTERS07to14) <- c("pid","sameres07to14")

cat("\n  joining..\n")
DATA <- join(DATA, ALTERS07to14, by = "pid")

cat("\nReading 2009 to 2012 alters..")
ALTERS09to12 <- read.csv(
    "alters09to12.csv", head=FALSE
    )
names(ALTERS09to12) <- c("pid","sameres09to12")

cat("\n  joining..\n")
DATA <- join(DATA, ALTERS09to12, by = "pid")

cat("\nReading 2010 to 2014 alters..")
ALTERS10to14 <- read.csv(
    "alters10to14.csv", head=FALSE
    )
names(ALTERS10to14) <- c("pid","sameres10to14")

cat("\n  joining..\n")
DATA <- join(DATA, ALTERS10to14, by = "pid")


calculateDistances <- function(
    point1=c("long_zip_cali307","lat_zip_cali307"),
    point2=c("long_zip_cali309","lat_zip_cali309"),
    data=ZIPS07to09
    ) {
    data[,point1[1]][is.na(data[,point1[1]])] <- 0
    data[,point1[2]][is.na(data[,point1[2]])] <- 0
    data[,point2[1]][is.na(data[,point2[1]])] <- 0
    data[,point2[2]][is.na(data[,point2[2]])] <- 0
    return.vector <- sapply(
        1:nrow(data),
        function(i)
            spDistsN1(
                as.matrix(data[i,point1]),
                as.matrix(data[i,point2]),
                longlat=T
                )
        )
    return.vector[
                  data[,point1[1]]==0
                  |data[,point1[2]]==0
                  |data[,point2[1]]==0
                  |data[,point2[2]]==0] <- NA
    return(return.vector)
}


cat("\nAdding distances..")
cat("\n 2007 to 2009..", format(Sys.time(), "%a %b %d %X %Y"))
ZIPS07to09 <- aggregate(
    rep(1, nrow(DATA)) ~ long_zip_cali307 + lat_zip_cali307 +
       long_zip_cali309 + lat_zip_cali309,
    data = DATA,
    FUN = sum
    )
names(ZIPS07to09)[ncol(ZIPS07to09)] <- "movers07to09.ziptozipcount"
cat("\n ",nrow(ZIPS07to09), "zip-to-zip calculations")
ZIPS07to09$move07to09.distance <- calculateDistances()

DATA <- join(
    DATA,
    ZIPS07to09,
    by = c(
        "long_zip_cali307","lat_zip_cali307",
        "long_zip_cali309","lat_zip_cali309"
        )
    )

cat("\n 2009 to 2010..", format(Sys.time(), "%a %b %d %X %Y"))
ZIPS09to10 <- aggregate(
    rep(1, nrow(DATA)) ~ long_zip_cali309 + lat_zip_cali309 +
       long_zip_cali310 + lat_zip_cali310,
    data = DATA,
    FUN = sum
    )
names(ZIPS09to10)[ncol(ZIPS09to10)] <- "movers09to10.ziptozipcount"
cat("\n ",nrow(ZIPS09to10), "zip-to-zip calculations")
ZIPS09to10$move09to10.distance <- calculateDistances(
    point1=c("long_zip_cali309","lat_zip_cali309"),
    point2=c("long_zip_cali310","lat_zip_cali310"),
    data=ZIPS09to10
    )

DATA <- join(
    DATA,
    ZIPS09to10,
    by = c(
        "long_zip_cali309","lat_zip_cali309",
        "long_zip_cali310","lat_zip_cali310"
        )
    )

cat("\n 2010 to 2012..", format(Sys.time(), "%a %b %d %X %Y"))
ZIPS10to12 <- aggregate(
    rep(1, nrow(DATA)) ~ long_zip_cali310 + lat_zip_cali310 +
       long_zip_cali312 + lat_zip_cali312,
    data = DATA,
    FUN = sum
    )
names(ZIPS10to12)[ncol(ZIPS10to12)] <- "movers10to12.ziptozipcount"
cat("\n ",nrow(ZIPS10to12), "zip-to-zip calculations")
ZIPS10to12$move10to12.distance <- calculateDistances(
    point1=c("long_zip_cali310","lat_zip_cali310"),
    point2=c("long_zip_cali312","lat_zip_cali312"),
    data=ZIPS10to12
    )

DATA <- join(
    DATA,
    ZIPS10to12,
    by = c(
        "long_zip_cali310","lat_zip_cali310",
        "long_zip_cali312","lat_zip_cali312"
        )
    )

cat("\n 2012 to 2014..", format(Sys.time(), "%a %b %d %X %Y"))
ZIPS12to14 <- aggregate(
    rep(1, nrow(DATA)) ~ long_zip_cali312 + lat_zip_cali312 +
       long_zip_cali314 + lat_zip_cali314,
    data = DATA,
    FUN = sum
    )
names(ZIPS12to14)[ncol(ZIPS12to14)] <- "movers12to14.ziptozipcount"
cat("\n ",nrow(ZIPS12to14), "zip-to-zip calculations")
ZIPS12to14$move12to14.distance <- calculateDistances(
    point1=c("long_zip_cali312","lat_zip_cali312"),
    point2=c("long_zip_cali314","lat_zip_cali314"),
    data=ZIPS12to14
    )

DATA <- join(
    DATA,
    ZIPS12to14,
    by = c(
        "long_zip_cali312","lat_zip_cali312",
        "long_zip_cali314","lat_zip_cali314"
        )
    )

## cat("\n\nWriting to file..")
## write.csv(
##     DATA,
##     "formattedByIndividual_Cali3AllYears_byPID_filtered_movers_selectvars2_withzipcodes_fixed.csv",
##     row.names=FALSE,
##     quote=FALSE
##     )

cat("\n\nDone (EOF).\n")
